package io.renren.pageprocessor;

import org.springframework.stereotype.Component;

import io.renren.base.AbstractPageProcessor;

import us.codecraft.webmagic.Page;

@Component
public class www_yuanjilu_xyz extends AbstractPageProcessor{
    public static final String URL_LIST = "https://www\\.yuanjilu\\.xyz/page/\\d";

    public static final String URL_POST = "https://www\\.yuanjilu\\.xyz/article/\\w+\\.html";
    @Override
    public void process(Page page) {
        //列表页
        if (page.getUrl().regex(URL_LIST).match()) {//如果是分页url就，钻取列表和详情
            page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all());//添加下一页分页
            page.addTargetRequests(page.getHtml().links().regex(URL_POST).all());//添加详情页
            //文章页
        } else {
            page.putField("url", page.getUrl());
            page.putField("title", page.getHtml().xpath("//h1[@class='view-title']/text()"));
            page.putField("content", page.getHtml().xpath("//div[@class='view-content']"));
            page.putField("author", page.getHtml().xpath("//div[@class='view-meta']/span[1]/text()"));
            page.putField("date", page.getHtml().xpath("//div[@class='view-meta']/span[3]/text()"));
        }
    }
    @Override
    public String initTargetUrl() {
        //TODO Auto-generated method stub
        return "https://www.yuanjilu.xyz/page/1";
    }

    public static void main(String[] args) {
        www_yuanjilu_xyz a=new www_yuanjilu_xyz();
        a.start();
    }
}
